# Explore topic 13d # First load all of the functions we will use source( "../gnrnd5.R") source( "../gnrnd4.R") source( "../ci_prop.R") # Topic 13d looks at generating a confidence # interval for a population proportion # # Let us generate a population gnrnd5(33658499907, 458465) # put the population into big_pop big_pop <- L1 # We can look at some of the population head( big_pop, 100) # Then, our interest is to get a 92% confidence interval # for the proportion of 3's in the population. # To do this we will take a sample # of a certain size samp_size <- 93 # then, just so that we can all get the same # sample, generate the index values for # a sample of that size key1 <- 702370001+ (samp_size-1)*100 gnrnd4(key1, 500000001) L1 this_sample <- big_pop[ L1 ] # look at our sample this_sample # we can find the sample proportion of 3's table( this_sample ) samp_proport_3 <- 30/length( this_sample ) samp_proport_3 # Now, because we will use the sample proportion, # if certain conditions are met, then we can # use the normal approximation to the distribution # of the proportions. # the conditions are that n*p>=10 and n*(1-p)>=10 93*samp_proport_3 93* (1 - samp_proport_3) # those both pass, so we can use the value # sqrt( p*(1-p)/n ) for the standard deviation of # our sample statistic, the standard error, in a # a normal population. st_error <- sqrt( samp_proport_3*(1-samp_proport_3)/samp_size) st_error # The sample proportion is our point estimate # For a 92% confidence interval we need 4% on the # outside of each side of the interval z_low <- qnorm( 0.04 ) z_low z_high <- qnorm( 0.96 ) z_high # we can find # samp_proport_3 +/- z(alpha_over2)*st_error # CI low value samp_proport_3 + z_low*st_error # CI high value samp_proport_3 + z_high*st_error ##### or we could have found the margin of error MOE <- z_high*st_error MOE # and then found the limits for the # confidence interval samp_proport_3 - MOE # the low end samp_proport_3 + MOE # the high end ### of course all of this could be done via # our ci_prop function ci_prop( 93, 30, 0.92) ################################## # we could try this at a different confidence # level. Just alter the confidence level and # then run the subsequent lines, or just skip # down to line 83 and get the new values #################################### # If we express the confidence level as a # percent then we say that that percent of the # confidence intervals that we generate # using this methodology will contain the # true proportion. That means, that at this point # in running the script, I do not know if the # 92% confidence interval that we generated, # namely (0.2377, 0.4074 ) does or does not # contain the true population proportion of 3's.. # # Let us find the true proportion of 3's and see if it is # in the interval. item_count <- table( big_pop ) item_count true_proportion <- item_count[3]/length( big_pop ) true_proportion # yes it is! # This has been an illustration, but let us # go through the process 10000 times and # see how many intervals that we generate this # way contain the true mean # first reset the confidence level and # sample size just in case we want to change # them later conf_level <- 0.92 samp_size <- 93 L3 <- 1:10000 for( i in 1:10000 ) { this_sample <- sample( big_pop, samp_size ) this_count <- table( this_sample ) this_ci <- ci_prop( samp_size, this_count[3], conf_level) if( this_ci[1] <= true_proportion & true_proportion <= this_ci[2] ) { L3[i] = "hit"} else { L3[i] = "missed"} } # see how we did table( L3 ) ######### # if we want we can do this again and we # can even change the values in lines 124 # and/or 125 if we want.